%% Manylabs benchmark analysis
% Replicates Figure 2 top row, Table 1, Figure 3, Figure 4, 
% Figure A-2 top row, Figure A-3, Figure A-4, Figure A-5

%% Parametrization
% Number of bins for PIT histograms
bin_num = 5;

% Set max iteration number and tolerance
max_iter = 5000;
tol = 0.0001;

% Random seed
seed = 101;

% Sample size of simulated coverage & S stats
sim_cov = 5000;
sim_S = 5000;

%% Case 1: N=66, J=5 (All effects included, nu estimated by EB)
N = 66;
J = 5;
highlight_indices = [37:42, 49:60];

% Extract study estimates and ses
sheet1 = 'N=66, J=5';
estimates1 = xlsread('../Data/ManyLabs_Data.xlsx', sheet1, 'C3:H68'); 
se1 = xlsread('../Data/ManyLabs_Data.xlsx', sheet1, 'N3:S68');   
size1 = xlsread('../Data/ManyLabs_Data.xlsx', sheet1, 'Y3:AD68'); 

% Randomly assign baseline and validation studies
[hat_theta_1, hat_vartheta_1, rep_base_se2_1, rep_val_se2_1, size_base_1, size_val_1] = random_assignment(estimates1, se1, size1, seed);

% Estimate nu from pooled data using two methods
EB_est_nu_1 = MLE_nu(hat_theta_1, rep_base_se2_1, 0, max_iter, tol);

% Compute posterior means and variances for tau
[bar_tau_1, bar_V_tau_1] = compute_posterior_moments(hat_theta_1, rep_base_se2_1, EB_est_nu_1);

% Check whether the validation study estimates are located in the predicted interval
cov_stat_sim = simulate_cov_stat_distribution(N, sim_cov);
[predict_se_1, empirical_cov_1, cov_p_value_1] = check_CI(hat_vartheta_1, rep_val_se2_1, bar_tau_1, bar_V_tau_1, EB_est_nu_1, cov_stat_sim);
xtick = 0:10:70;
ytick = -4:1:3;
[interval_1, order_1] = plot_intervals(hat_vartheta_1, bar_tau_1, predict_se_1, highlight_indices, xtick, ytick);
saveas(interval_1, "../Results/EB_Interval_N_66_J_5.png")

% Generate the S statistics and p-value
S_stat_sim_1 = simulate_S_stat_distribution(N, sim_S);
ytick = 0:0.05:0.35;
[S_stat_1, S_p_value_1, PIT_plot_1] = PITs(hat_vartheta_1, bar_tau_1, predict_se_1, S_stat_sim_1, bin_num, ytick);
saveas(PIT_plot_1, "../Results/EB_PIT_N_66_J_5.png")

% Plot the ratio of nu to se
yscale_rat = [0, 5];
yscale_rat_asym = [0, 0.7];
nu_se_fig_1 = plot_nu_se_ratio(EB_est_nu_1, rep_base_se2_1, rep_val_se2_1, yscale_rat, xtick);
nu_asymse_fig_1 = plot_nu_asymse_ratio(EB_est_nu_1, rep_base_se2_1, rep_val_se2_1, size_base_1, size_val_1, yscale_rat_asym, xtick);
saveas(nu_se_fig_1, "../Results/nu_se_ratio_N_66_J_5.png")
saveas(nu_asymse_fig_1, "../Results/nu_asymse_ratio_N_66_J_5.png")

%% Case 2: N=132, J=2 (All effects included, nu estimated by EB)
N = 132;
J = 2;
highlight_indices = [73:84, 97:120];

% Extract study estimates and ses
sheet2 = 'N=132, J=2';
estimates2 = xlsread('../Data/ManyLabs_Data.xlsx', sheet2, 'C3:E134'); 
se2 = xlsread('../Data/ManyLabs_Data.xlsx', sheet2, 'K3:M134');  
size2 = xlsread('../Data/ManyLabs_Data.xlsx', sheet2, 'S3:U134'); 

% Randomly assign baseline and validation studies
[hat_theta_2, hat_vartheta_2, rep_base_se2_2, rep_val_se2_2, size_base_2, size_val_2] = random_assignment(estimates2, se2, size2, seed);

% Estimate nu from pooled data
EB_est_nu_2 = MLE_nu(hat_theta_2, rep_base_se2_2, 0, max_iter, tol);

% Compute posterior means and variances for tau
[bar_tau_2, bar_V_tau_2] = compute_posterior_moments(hat_theta_2, rep_base_se2_2, EB_est_nu_2);

% Check whether the validation study estimates are located in the predicted interval
cov_stat_sim = simulate_cov_stat_distribution(N, sim_cov);
[predict_se_2, empirical_cov_2, cov_p_value_2] = check_CI(hat_vartheta_2, rep_val_se2_2, bar_tau_2, bar_V_tau_2, EB_est_nu_2, cov_stat_sim);
xtick = 0:20:140;
ytick = -4:1:3;
[interval_2, order_2] = plot_intervals(hat_vartheta_2, bar_tau_2, predict_se_2, highlight_indices, xtick, ytick);
saveas(interval_2, "../Results/EB_Interval_N_132_J_2.png")

% Generate the S statistics and p-value
S_stat_sim_2 = simulate_S_stat_distribution(N, sim_S);
ytick = 0:0.05:0.3;
[S_stat_2, S_p_value_2, PIT_plot_2] = PITs(hat_vartheta_2, bar_tau_2, predict_se_2, S_stat_sim_2, bin_num, ytick);
saveas(PIT_plot_2, "../Results/EB_PIT_N_132_J_2.png")

% Plot the ratio of nu to se
yscale_rat = [0, 5];
yscale_rat_asym = [0, 0.7];
nu_se_fig_2 = plot_nu_se_ratio(EB_est_nu_2, rep_base_se2_2, rep_val_se2_2, yscale_rat, xtick);
nu_asymse_fig_2 = plot_nu_asymse_ratio(EB_est_nu_2, rep_base_se2_2, rep_val_se2_2, size_base_2, size_val_2, yscale_rat_asym, xtick);
saveas(nu_se_fig_2, "../Results/nu_se_ratio_N_132_J_2.png")
saveas(nu_asymse_fig_2, "../Results/nu_asymse_ratio_N_132_J_2.png")

%% Case 3: N=48, J=5 (three irreplicable studies excluded, nu estimated by EB)
N = 48;
J = 5;
highlight_indices = [];

% Extract study estimates and ses
sheet3 = 'N=48, J=5';
estimates3 = xlsread('../Data/ManyLabs_Data.xlsx', sheet3, 'C3:H50'); 
se3 = xlsread('../Data/ManyLabs_Data.xlsx', sheet3, 'N3:S50');   
size3 = xlsread('../Data/ManyLabs_Data.xlsx', sheet3, 'Y3:AD50');

% Randomly assign baseline and validation studies
[hat_theta_3, hat_vartheta_3, rep_base_se2_3, rep_val_se2_3, size_base_3, size_val_3] = random_assignment(estimates3, se3, size3, seed);

% Estimate nu from pooled data
EB_est_nu_3 = MLE_nu(hat_theta_3, rep_base_se2_3, 0, max_iter, tol);

% Compute posterior means and variances for tau
[bar_tau_3, bar_V_tau_3] = compute_posterior_moments(hat_theta_3, rep_base_se2_3, EB_est_nu_3);

% Check whether the validation study estimates are located in the predicted interval
cov_stat_sim = simulate_cov_stat_distribution(N, sim_cov);
[predict_se_3, empirical_cov_3, cov_p_value_3] = check_CI(hat_vartheta_3, rep_val_se2_3, bar_tau_3, bar_V_tau_3, EB_est_nu_3, cov_stat_sim);
xtick = 0:10:50;
ytick = -4:1:3;
[interval_3, order_3] = plot_intervals(hat_vartheta_3, bar_tau_3, predict_se_3, highlight_indices, xtick, ytick);
saveas(interval_3, "../Results/EB_Interval_N_48_J_5.png")

% Generate the S statistics and p-value
S_stat_sim_3 = simulate_S_stat_distribution(N, sim_S);
ytick = 0:0.05:0.35;
[S_stat_3, S_p_value_3, PIT_plot_3] = PITs(hat_vartheta_3, bar_tau_3, predict_se_3, S_stat_sim_3, bin_num, ytick);
saveas(PIT_plot_3, "../Results/EB_PIT_N_48_J_5.png")

% Plot the ratio of nu to se
yscale_rat = [0, 6];
yscale_rat_asym = [0, 0.7];
nu_se_fig_3 = plot_nu_se_ratio(EB_est_nu_3, rep_base_se2_3, rep_val_se2_3, yscale_rat, xtick);
nu_asymse_fig_3 = plot_nu_asymse_ratio(EB_est_nu_3, rep_base_se2_3, rep_val_se2_3, size_base_3, size_val_3, yscale_rat_asym, xtick);
saveas(nu_se_fig_3, "../Results/nu_se_ratio_N_48_J_5.png")
saveas(nu_asymse_fig_3, "../Results/nu_asymse_ratio_N_48_J_5.png")

%% Case 4: N=96, J=2 (three irreplicable studies excluded, nu estimated by EB)
N = 96;
J = 2;
highlight_indices = [];

% Extract study estimates and ses
sheet4 = 'N=96, J=2';
estimates4 = xlsread('../Data/ManyLabs_Data.xlsx', sheet4, 'C3:E98'); 
se4 = xlsread('../Data/ManyLabs_Data.xlsx', sheet4, 'K3:M98');   
size4 = xlsread('../Data/ManyLabs_Data.xlsx', sheet4, 'S3:U98'); 

% Randomly assign baseline and validation studies
[hat_theta_4, hat_vartheta_4, rep_base_se2_4, rep_val_se2_4, size_base_4, size_val_4] = random_assignment(estimates4, se4, size4, seed);

% Estimate nu from pooled data
EB_est_nu_4 = MLE_nu(hat_theta_4, rep_base_se2_4, 0, max_iter, tol);

% Compute posterior means and variances for tau
[bar_tau_4, bar_V_tau_4] = compute_posterior_moments(hat_theta_4, rep_base_se2_4, EB_est_nu_4);

% Check whether the validation study estimates are located in the predicted interval
cov_stat_sim = simulate_cov_stat_distribution(N, sim_cov);
[predict_se_4, empirical_cov_4, cov_p_value_4] = check_CI(hat_vartheta_4, rep_val_se2_4, bar_tau_4, bar_V_tau_4, EB_est_nu_4, cov_stat_sim);
xtick = 0:20:100;
ytick = -4:1:3;
[interval_4, order_4] = plot_intervals(hat_vartheta_4, bar_tau_4, predict_se_4, highlight_indices, xtick, ytick);
saveas(interval_4, "../Results/EB_Interval_N_96_J_2.png")

% Generate the S statistics and p-value
S_stat_sim_4 = simulate_S_stat_distribution(N, sim_S);
ytick = 0:0.05:0.3;
[S_stat_4, S_p_value_4, PIT_plot_4] = PITs(hat_vartheta_4, bar_tau_4, predict_se_4, S_stat_sim_4, bin_num, ytick);
saveas(PIT_plot_4, "../Results/EB_PIT_N_96_J_2.png")

% Plot the ratio of nu to se
yscale_rat = [0, 6];
yscale_rat_asym = [0, 0.7];
nu_se_fig_4 = plot_nu_se_ratio(EB_est_nu_4, rep_base_se2_4, rep_val_se2_4, yscale_rat, xtick);
nu_asymse_fig_4 = plot_nu_asymse_ratio(EB_est_nu_4, rep_base_se2_4, rep_val_se2_4, size_base_4, size_val_4, yscale_rat_asym, xtick);
saveas(nu_se_fig_4, "../Results/nu_se_ratio_N_96_J_2.png")
saveas(nu_asymse_fig_4, "../Results/nu_asymse_ratio_N_96_J_2.png")

%% Summarize the EB results
EB_est_nu = [EB_est_nu_1, EB_est_nu_2, EB_est_nu_3, EB_est_nu_4]
EB_empirical_cov = [empirical_cov_1, empirical_cov_2, empirical_cov_3, empirical_cov_4]
EB_cov_p_value = [cov_p_value_1, cov_p_value_2, cov_p_value_3, cov_p_value_4]
EB_S_stat = [S_stat_1, S_stat_2, S_stat_3, S_stat_4]
EB_S_p_value = [S_p_value_1, S_p_value_2, S_p_value_3, S_p_value_4]

%% Case 5: N=66, J=5 (All effects included, nu=0)
N = 66;
J = 5;
highlight_indices = [37:42, 49:60];

% Extract study estimates and ses
sheet1 = 'N=66, J=5';
estimates5 = xlsread('../Data/ManyLabs_Data.xlsx', sheet1, 'C3:H68'); 
se5 = xlsread('../Data/ManyLabs_Data.xlsx', sheet1, 'N3:S68');   
size5 = xlsread('../Data/ManyLabs_Data.xlsx', sheet1, 'Y3:AD68'); 

% Randomly assign baseline and validation studies
[hat_theta_5, hat_vartheta_5, rep_base_se2_5, rep_val_se2_5, size_base_5, size_val_5] = random_assignment(estimates5, se5, size5, seed);

% Compute posterior means and variances for tau
[bar_tau_5, bar_V_tau_5] = compute_posterior_moments(hat_theta_5, rep_base_se2_5, 0);

% Check whether the validation study estimates are located in the predicted interval
cov_stat_sim = simulate_cov_stat_distribution(N, sim_cov);
[predict_se_5, empirical_cov_5, cov_p_value_5] = check_CI(hat_vartheta_5, rep_val_se2_5, bar_tau_5, bar_V_tau_5, 0, cov_stat_sim);
xtick = 0:10:70;
ytick = -3:1:3;
[interval_5, order_5] = plot_intervals(hat_vartheta_5, bar_tau_5, predict_se_5, highlight_indices, xtick, ytick);
saveas(interval_5, "../Results/Benchmark_Interval_N_66_J_5.png")

% Generate the S statistics and p-value
S_stat_sim_5 = simulate_S_stat_distribution(N, sim_S);
ytick = 0:0.05:0.35;
[S_stat_5, S_p_value_5, PIT_plot_5] = PITs(hat_vartheta_5, bar_tau_5, predict_se_5, S_stat_sim_5, bin_num, ytick);
saveas(PIT_plot_5, "../Results/Benchmark_PIT_N_66_J_5.png")


%% Case 6: N=132, J=2 (All effects included, nu=0)
N = 132;
J = 2;
highlight_indices = [73:84, 97:120];

% Extract study estimates and ses
sheet2 = 'N=132, J=2';
estimates6 = xlsread('../Data/ManyLabs_Data.xlsx', sheet2, 'C3:E134'); 
se6 = xlsread('../Data/ManyLabs_Data.xlsx', sheet2, 'K3:M134');  
size6 = xlsread('../Data/ManyLabs_Data.xlsx', sheet2, 'S3:U134'); 

% Randomly assign baseline and validation studies
[hat_theta_6, hat_vartheta_6, rep_base_se2_6, rep_val_se2_6, size_base_6, size_val_6] = random_assignment(estimates6, se6, size6, seed);

% Compute posterior means and variances for tau
[bar_tau_6, bar_V_tau_6] = compute_posterior_moments(hat_theta_6, rep_base_se2_6, 0);

% Check whether the validation study estimates are located in the predicted interval
cov_stat_sim = simulate_cov_stat_distribution(N, sim_cov);
[predict_se_6, empirical_cov_6, cov_p_value_6] = check_CI(hat_vartheta_6, rep_val_se2_6, bar_tau_6, bar_V_tau_6, 0, cov_stat_sim);
xtick = 0:20:140;
ytick = -4:1:3;
[interval_6, order_6] = plot_intervals(hat_vartheta_6, bar_tau_6, predict_se_6, highlight_indices, xtick, ytick);
saveas(interval_6, "../Results/Benchmark_Interval_N_132_J_2.png")

% Generate the S statistics and p-value
S_stat_sim_6 = simulate_S_stat_distribution(N, sim_S);
ytick = 0:0.05:0.35;
[S_stat_6, S_p_value_6, PIT_plot_6] = PITs(hat_vartheta_6, bar_tau_6, predict_se_6, S_stat_sim_6, bin_num, ytick);
saveas(PIT_plot_6, "../Results/Benchmark_PIT_N_132_J_2.png")


%% Case 7: N=48, J=5 (three irreplicable studies excluded, nu=0)
N = 48;
J = 5;
highlight_indices = [];

% Extract study estimates and ses
sheet3 = 'N=48, J=5';
estimates7 = xlsread('../Data/ManyLabs_Data.xlsx', sheet3, 'C3:H50'); 
se7 = xlsread('../Data/ManyLabs_Data.xlsx', sheet3, 'N3:S50');   
size7 = xlsread('../Data/ManyLabs_Data.xlsx', sheet3, 'Y3:AD50');

% Randomly assign baseline and validation studies
[hat_theta_7, hat_vartheta_7, rep_base_se2_7, rep_val_se2_7, size_base_7, size_val_7] = random_assignment(estimates7, se7, size7, seed);

% Compute posterior means and variances for tau
[bar_tau_7, bar_V_tau_7] = compute_posterior_moments(hat_theta_7, rep_base_se2_7, 0);

% Check whether the validation study estimates are located in the predicted interval
cov_stat_sim = simulate_cov_stat_distribution(N, sim_cov);
[predict_se_7, empirical_cov_7, cov_p_value_7] = check_CI(hat_vartheta_7, rep_val_se2_7, bar_tau_7, bar_V_tau_7, 0, cov_stat_sim);
xtick = 0:10:50;
ytick = -3:1:3;
[interval_7, order_7] = plot_intervals(hat_vartheta_7, bar_tau_7, predict_se_7, highlight_indices, xtick, ytick);
saveas(interval_7, "../Results/Benchmark_Interval_N_48_J_5.png")

% Generate the S statistics and p-value
S_stat_sim_7 = simulate_S_stat_distribution(N, sim_S);
ytick = 0:0.05:0.35;
[S_stat_7, S_p_value_7, PIT_plot_7] = PITs(hat_vartheta_7, bar_tau_7, predict_se_7, S_stat_sim_7, bin_num, ytick);
saveas(PIT_plot_7, "../Results/Benchmark_PIT_N_48_J_5.png")

%% Case 8: N=96, J=2 (three irreplicable studies excluded, nu=0)
N = 96;
J = 2;
highlight_indices = [];

% Extract study estimates and ses
sheet4 = 'N=96, J=2';
estimates8 = xlsread('../Data/ManyLabs_Data.xlsx', sheet4, 'C3:E98'); 
se8 = xlsread('../Data/ManyLabs_Data.xlsx', sheet4, 'K3:M98');   
size8 = xlsread('../Data/ManyLabs_Data.xlsx', sheet4, 'S3:U98'); 

% Randomly assign baseline and validation studies
[hat_theta_8, hat_vartheta_8, rep_base_se2_8, rep_val_se2_8, size_base_8, size_val_8] = random_assignment(estimates8, se8, size8, seed);

% Compute posterior means and variances for tau
[bar_tau_8, bar_V_tau_8] = compute_posterior_moments(hat_theta_8, rep_base_se2_8, 0);

% Check whether the validation study estimates are located in the predicted interval
cov_stat_sim = simulate_cov_stat_distribution(N, sim_cov);
[predict_se_8, empirical_cov_8, cov_p_value_8] = check_CI(hat_vartheta_8, rep_val_se2_8, bar_tau_8, bar_V_tau_8, 0, cov_stat_sim);
xtick = 0:20:100;
ytick = -4:1:3;
[interval_8, order_8] = plot_intervals(hat_vartheta_8, bar_tau_8, predict_se_8, highlight_indices, xtick, ytick);
saveas(interval_8, "../Results/Benchmark_Interval_N_96_J_2.png")

% Generate the S statistics and p-value
S_stat_sim_8 = simulate_S_stat_distribution(N, sim_S);
ytick = 0:0.05:0.35;
[S_stat_8, S_p_value_8, PIT_plot_8] = PITs(hat_vartheta_8, bar_tau_8, predict_se_8, S_stat_sim_8, bin_num, ytick);
saveas(PIT_plot_8, "../Results/Benchmark_PIT_N_96_J_2.png")

%% Summarize the benchmark results
Benchmark_empirical_cov = [empirical_cov_5, empirical_cov_6, empirical_cov_7, empirical_cov_8]
Benchmark_cov_p_value = [cov_p_value_5, cov_p_value_6, cov_p_value_7, cov_p_value_8]
Benchmark_S_stat = [S_stat_5, S_stat_6, S_stat_7, S_stat_8]
Benchmark_S_p_value = [S_p_value_5, S_p_value_6, S_p_value_7, S_p_value_8]
